This script creates a merged list of treatments from both CalMapper and PFIRS with: - duplicates removed from PFIRS, defined as within 1 km and 30 days from CalMapper record - years 2020-2022 - filtered to AEPP threshold - buffered overlay method to account for PFIRS being point data

Load data

file = "Rdata/CalMapper_activities_fire.Rdata"
load(file)
file = "Rdata/PFIRS_2017-2022_pull2023.Rdata"
load(file)

Filter to years 2020-2022

Check whether YEAR matches activity dates in CalMapper

cm %>% 
  group_by(YEAR) %>% 
  ggplot(aes(x = as.factor(YEAR), y = ACTIVITY_START))+
  geom_point(position = position_jitter())

cm %>% 
  group_by(YEAR) %>% 
  ggplot(aes(x = as.factor(YEAR), y = ACTIVITY_END))+
  geom_point(position = position_jitter())

Yup, it doesn’t look like there are issues

CalMapper

cm <- cm %>% 
  filter(YEAR %in% seq(2020, 2022))

PFIRS

pfirs %>% 
  group_by(Year) %>% 
  ggplot(aes(x = as.factor(Year), y = Burn_Date))+
  geom_point(position = position_jitter())

pfirs <- pfirs %>% 
  filter(Year %in% seq(2020, 2022))

Filter by size according to AERR thresholds

CalMapper

cm %>% 
  st_drop_geometry() %>% 
  group_by(ACTIVITY_DESCRIPTION) %>% 
  count()
## # A tibble: 3 × 2
## # Groups:   ACTIVITY_DESCRIPTION [3]
##   ACTIVITY_DESCRIPTION     n
##   <chr>                <int>
## 1 Broadcast Burn         626
## 2 Cultural Burning         4
## 3 Pile Burning          1445
nrow_old <- nrow(cm)

cm <- cm %>% 
  filter((ACTIVITY_DESCRIPTION == "Pile Burning" & TREATED_ACRES >= 25) | (ACTIVITY_DESCRIPTION %in% c("Broadcast Burn", "Cultural Burning") & TREATED_ACRES >= 50))

deleted_n <- nrow_old - nrow(cm)
print(paste("Deleted", deleted_n, "records below the size threshold"))
## [1] "Deleted 1689 records below the size threshold"

PFIRS

pfirs %>% 
  st_drop_geometry() %>% 
  group_by(Burn_Type) %>% 
  count()
## # A tibble: 11 × 2
## # Groups:   Burn_Type [11]
##    Burn_Type               n
##    <chr>               <int>
##  1 Broadcast            1526
##  2 Hand Pile            3087
##  3 Hand Piles             43
##  4 Landing Pile          226
##  5 Landing Piles          27
##  6 Machine Pile         1484
##  7 Machine Piles           2
##  8 Multiple Fuel Types    61
##  9 Multiple Fuels         46
## 10 UNK                    41
## 11 Unknown               329

Add a column for “pile” or “broadcast” since the burn type column is messy

pfirs <- pfirs %>% 
  mutate(Burn_Type_Simple = case_when(
    Burn_Type %in% c("Hand Pile", "Hand Piles", "Landing Pile", "Landing Piles", "Machine Pile", "Machine Piles") ~ "Pile", 
    Burn_Type == "Broadcast" ~ "Broadcast",
    Burn_Type %in% c("Multiple Fuel Types", "Multiple Fuels", "UNK", "Unknown") ~ "Unknown/Multiple", 
    TRUE ~ NA
  ))
pfirs %>% 
  st_drop_geometry() %>% 
  group_by(Burn_Type, Burn_Type_Simple) %>% 
  count()
## # A tibble: 11 × 3
## # Groups:   Burn_Type, Burn_Type_Simple [11]
##    Burn_Type           Burn_Type_Simple     n
##    <chr>               <chr>            <int>
##  1 Broadcast           Broadcast         1526
##  2 Hand Pile           Pile              3087
##  3 Hand Piles          Pile                43
##  4 Landing Pile        Pile               226
##  5 Landing Piles       Pile                27
##  6 Machine Pile        Pile              1484
##  7 Machine Piles       Pile                 2
##  8 Multiple Fuel Types Unknown/Multiple    61
##  9 Multiple Fuels      Unknown/Multiple    46
## 10 UNK                 Unknown/Multiple    41
## 11 Unknown             Unknown/Multiple   329

Filter to burns within the AERR size thresholds

nrow_old <- nrow(pfirs)

pfirs <- pfirs %>% 
  filter((Burn_Type_Simple == "Pile" & Acres_Burned >= 25) | (Burn_Type_Simple %in% c("Broadcast", "Unknown/Multiple") & Acres_Burned >= 50))

deleted_n <- nrow_old - nrow(pfirs)
print(paste("Deleted", deleted_n, "records below the size threshold"))
## [1] "Deleted 5617 records below the size threshold"

Sync up crs’s

if(object.size(cm) > object.size(pfirs)){
  print("CalMapper is more GB than PFIRS")
}
## [1] "CalMapper is more GB than PFIRS"
pfirs <- st_transform(pfirs, st_crs(cm))

Add ID column to PFIRS

pfirs <- pfirs %>% 
  mutate(ID_CT = seq(1, nrow(pfirs)))

Add Data_source column

pfirs <- pfirs %>% 
  mutate(Data_source = "PFIRS")
cm <- cm %>% 
  mutate(Data_source = "CalMapper")

Plot filtered data together

Convert year columns to factors

pfirs$Year <- as.factor(pfirs$Year)
cm$Year <- as.factor(cm$YEAR)
cm <- cm %>% 
  select(-YEAR)

Buffer PFIRS

pfirs_buff <- st_buffer(pfirs, dist = 1000)

Write

st_write(pfirs_buff, dsn = "shapefiles", layer = "pfirs_buffer", driver = "ESRI Shapefile", delete_layer = T)
## Warning in abbreviate_shapefile_names(obj): Field names abbreviated for ESRI
## Shapefile driver
## Deleting layer `pfirs_buffer' using driver `ESRI Shapefile'
## Writing layer `pfirs_buffer' to data source `shapefiles' using driver `ESRI Shapefile'
## Updating existing layer pfirs_buffer
## Writing 1255 features with 13 fields and geometry type Polygon.

Plot using mapview

st_crs(pfirs) == st_crs(cm)
## [1] TRUE
mapview(pfirs, zcol = "Year", col.regions = colors, alpha.regions = 0.5)+
  mapview(cm, zcol = "Year", col.regions = colors,  alpha.regions = 0.5)+
  mapview(pfirs_buff, zcol = "Year", col.regions = colors, alpha.regions = 0.3)

Plot without duplicates removed at all

Calculate agreage of each, by year

table <- pfirs %>% 
  st_drop_geometry() %>% 
  group_by(Year) %>% 
  summarize(ac = sum(Acres_Burned)) %>% 
  mutate(source = "pfirs")
table
## # A tibble: 3 × 3
##   Year      ac source
##   <fct>  <dbl> <chr> 
## 1 2020  85037. pfirs 
## 2 2021  58527. pfirs 
## 3 2022  58111. pfirs
table_method0 <- cm %>% 
  st_drop_geometry() %>% 
  group_by(Year) %>% 
  summarize(ac = sum(TREATED_ACRES)) %>% 
  mutate(source = "cm") %>% 
  full_join(table)
## Joining with `by = join_by(Year, ac, source)`
table_method0
## # A tibble: 6 × 3
##   Year      ac source
##   <fct>  <dbl> <chr> 
## 1 2020  16904. cm    
## 2 2021  37178. cm    
## 3 2022  29722. cm    
## 4 2020  85037. pfirs 
## 5 2021  58527. pfirs 
## 6 2022  58111. pfirs

Plot

plot_method0 <- ggplot(table_method0 %>% group_by(source))+
  geom_col(aes(x = Year, y = ac, group = source, fill = source))+
  ylab("acres")+
  ggtitle("No duplicates removed")+
  theme_minimal()+
  guides(fill = "none")+
  ylim(c(0,110000))
plot_method0

Find duplicates

Rename columns with identifier for source

pfirs_buff_dups <- pfirs_buff %>% 
  rename(Year_pfirs = Year) %>% 
  rename(Date_pfirs = Burn_Date) %>% 
  rename(ID_PFIRS_CT = ID_CT) %>% 
  rename(acres_pfirs = Acres_Burned) %>% 
  rename(Agency_pfirs = Agency)
cm_dups <- cm %>% 
  ungroup %>% 
  rename(Year_cm = Year) %>% 
  rename(ACTIVITY_START_cm = ACTIVITY_START) %>% 
  rename(ACTIVITY_END_cm = ACTIVITY_END) %>% 
  rename(ID_CM_Trt = AQ_ID) %>% 
  rename(acres_cm = TREATED_ACRES) %>% 
  rename(Agency_cm = AGENCY_NAME) %>% 
  rename()

Spatial overlay

dups <- st_intersection(pfirs_buff_dups, cm_dups)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

Filter dups to only rows where dates are THE SAME between the two data sets

Filter to same year

dups <- dups %>% 
  filter(Year_pfirs == Year_cm)

Look at date differences

dups <- dups %>% 
  mutate(diff_dates_start = difftime(Date_pfirs, ACTIVITY_START_cm, units = "days")) %>% 
  mutate(diff_dates_end = difftime(Date_pfirs, ACTIVITY_END_cm, units = "days"))

Remove PFIRS points that meet the following criteria:

-  within the CAL FIRE date range
-  within 1 km of the same polygon

Date match

dups %>% 
  select(Burn_Unit, Date_pfirs, ACTIVITY_START_cm, ACTIVITY_END_cm)
## Simple feature collection with 482 features and 4 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -13835290 ymin: 3865348 xmax: -12974070 ymax: 5150342
## Projected CRS: WGS 84 / Pseudo-Mercator
## # A tibble: 482 × 5
##    Burn_Unit  Date_pfirs          ACTIVITY_START_cm ACTIVITY_END_cm
##    <chr>      <dttm>              <date>            <date>         
##  1 GHR 6      2021-02-04 00:00:00 2021-11-29        2021-12-09     
##  2 GHR 6      2021-02-18 00:00:00 2021-11-29        2021-12-09     
##  3 GHR 4      2021-03-08 00:00:00 2021-11-29        2021-12-09     
##  4 GHR 1      2021-10-25 00:00:00 2021-11-29        2021-12-09     
##  5 GHR 3      2021-10-27 00:00:00 2021-11-29        2021-12-09     
##  6 GHR 4      2021-10-28 00:00:00 2021-11-29        2021-12-09     
##  7 CRG 4      2021-11-02 00:00:00 2021-11-29        2021-12-09     
##  8 Gun 134-70 2021-11-03 00:00:00 2021-11-29        2021-12-09     
##  9 CRG 4      2021-11-03 00:00:00 2021-11-29        2021-12-09     
## 10 Gun 138-70 2021-11-04 00:00:00 2021-11-29        2021-12-09     
## # ℹ 472 more rows
## # ℹ 1 more variable: geometry <GEOMETRY [m]>
dups_daily <- dups %>% 
  filter(Date_pfirs >= ACTIVITY_START_cm & Date_pfirs <= ACTIVITY_END_cm) 

Check

dups_daily %>% 
  select(Date_pfirs, ACTIVITY_START_cm, ACTIVITY_END_cm) %>% 
  st_drop_geometry() %>% 
  tail()
## # A tibble: 6 × 3
##   Date_pfirs          ACTIVITY_START_cm ACTIVITY_END_cm
##   <dttm>              <date>            <date>         
## 1 2022-10-26 00:00:00 2022-10-25        2022-10-26     
## 2 2022-10-04 00:00:00 2022-10-04        2022-10-05     
## 3 2022-10-04 00:00:00 2022-10-04        2022-10-05     
## 4 2022-11-25 00:00:00 2022-11-25        2022-11-25     
## 5 2022-10-27 00:00:00 2022-10-27        2022-10-27     
## 6 2022-10-13 00:00:00 2022-10-13        2022-10-13

Reorder columns

dups_daily <- dups_daily %>% 
  select(Burn_Unit, ACTIVITY_NAME, Date_pfirs, ACTIVITY_START_cm, ACTIVITY_END_cm, acres_pfirs, acres_cm, Agency_cm, Agency_pfirs, Burn_Type_Simple, ACTIVITY_DESCRIPTION, Burn_Type_Simple, everything())
print(paste("There are ", nrow(dups_daily), "rows that are duplicated."))
## [1] "There are  242 rows that are duplicated."
print(paste("-  Of those, there are ", length(unique(dups_daily$ID_PFIRS_CT)), "unique PFIRS records"))
## [1] "-  Of those, there are  188 unique PFIRS records"
print(paste("-  and ", length(unique(dups_daily$ID_CM_Trt)), "unique CalMapper records"))
## [1] "-  and  131 unique CalMapper records"

Handle Duplicates

Method 1: Remove PFIRS points

Remove duplicates from PFIRS

pfirs_no_dups_daily <- pfirs %>% 
  filter(!ID_CT %in% dups_daily$ID_PFIRS_CT)

Combine pfirs and CalMapper

Sync column names

cm_merge <- cm %>% 
  rename(Agency = AGENCY_NAME,
          Acres_Burned = TREATED_ACRES,
         Burn_Type_Simple = ACTIVITY_DESCRIPTION) %>% 
  select(Data_source, TREATMENT_NAME, Acres_Burned, Burn_Type_Simple, ACTIVITY_START, ACTIVITY_END, Agency, everything()) %>% 
  mutate(gis_acres_treatment = as.numeric(st_area(cm)/4046.86)) %>% 
  st_drop_geometry()
pfirs_merge <- pfirs_no_dups_daily %>% 
  rename(TREATMENT_NAME = Burn_Unit) %>% 
  st_drop_geometry() %>% 
  select(Data_source, TREATMENT_NAME, Acres_Burned, Burn_Type_Simple, Burn_Date, Agency, Burn_Type, everything())

Merge

merged <- full_join(cm_merge, pfirs_merge)
## Joining with `by = join_by(Data_source, TREATMENT_NAME, Acres_Burned,
## Burn_Type_Simple, Agency, Year)`
nrow(pfirs_merge)+nrow(cm_merge) == nrow(merged)
## [1] TRUE
merged <- merged %>% 
  select(Data_source, TREATMENT_NAME, Acres_Burned, Burn_Type_Simple, ACTIVITY_START, ACTIVITY_END, Burn_Date, Agency, everything()) %>% 
  arrange(TREATMENT_NAME)

Save

write_excel_csv(merged, file = "excel/CalMapper_PFIRS_Method1.xls")

Plot acreage

table_method1 <- merged %>% 
  group_by(Year, Data_source) %>% 
  summarize(acres = sum(Acres_Burned)) 
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
plot_method1 <- table_method1 %>% 
  ggplot()+
  geom_col(aes(x = Year, y = acres, fill = Data_source))+
  theme_minimal()+
  ggtitle("Method 1")+
  ylim(c(0,110000))+
  guides(fill = "none")+
  ylab("")
plot_method1

Method 2: Remove whichever has the lower acreage

Sync column names

cm_dup <- dups_daily %>% 
  st_drop_geometry() %>% 
  select(ID_CM_Trt) %>% 
  distinct() 
cm_dup
## # A tibble: 131 × 1
##    ID_CM_Trt
##        <int>
##  1    216349
##  2    213186
##  3    224350
##  4     74270
##  5     98268
##  6    296099
##  7    241317
##  8    131270
##  9    292288
## 10    265633
## # ℹ 121 more rows

For the CalMapper polygons with multiple overlapping PFIRS points, add up the acreage in the PFIRS points. If the total acreage is greater than the treated acreage in CM, then delete the CM polygon. Otherwise, delete the PFIRS points.

count = 0
cm_merge <- cm %>% 
  ungroup() %>% 
  mutate(gis_acres_treatment = as.numeric(st_area(cm)/4046.86)) %>% 
  st_drop_geometry()
pfirs_merge <- pfirs %>% ungroup() %>% st_drop_geometry()
for(i in 1:nrow(cm_dup)){
  cm_ID_i <- cm_dup[i,1] %>% unlist()
  test <- dups_daily %>% filter(ID_CM_Trt == cm_ID_i)
  if(nrow(test)==1){
    count = count + 1 
    if(test$acres_cm >= test$acres_pfirs){
      pfirs_merge <- pfirs_merge %>% 
        filter(!ID_CT %in% test$ID_PFIRS_CT)
    } else{
      cm_merge <- cm_merge %>% 
        filter(AQ_ID != cm_ID_i)
      print("deleted CalMapper")
    }
  } else if (nrow(test)>1){
    if(test$acres_cm[1] >= sum(test$acres_pfirs)){
      pfirs_merge <- pfirs_merge %>% 
        filter(!ID_CT %in% test$ID_PFIRS_CT)
      print("deleted multiple PFIRS")
      print(test$ID_PFIRS_CT)
    } else{
      cm_merge <- cm_merge %>% 
        filter(AQ_ID != cm_ID_i)
      print("deleted CalMapper")
    }
  }
}
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 1074 1075 1076 1077
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 491 492 563 564
## [1] "deleted multiple PFIRS"
## [1] 902 903 904
## [1] "deleted multiple PFIRS"
## [1] 376 380
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 537 540 545
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 865 866 867 868
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 346 349
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 645 652 654
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 663 664
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 397 400 401
## [1] "deleted multiple PFIRS"
## [1] 456 464
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 688 691 693 706 750
## [1] "deleted multiple PFIRS"
## [1] 411 413
## [1] "deleted multiple PFIRS"
## [1] 411 413
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 1211 1220
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 1122 1132
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 1110 1111 1112 1114 1116
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 1086 1088
## [1] "deleted multiple PFIRS"
## [1] 1078 1079
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted multiple PFIRS"
## [1] 1128 1129
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
## [1] "deleted CalMapper"
print(paste("Out of all the duplicates,", count, "are 1:1 duplicates, and", nrow(cm_dup)-count, "CalMapper ID's are more complicated"))
## [1] "Out of all the duplicates, 82 are 1:1 duplicates, and 49 CalMapper ID's are more complicated"
print(paste("There were", nrow(cm)-nrow(cm_merge), "rows deleted from CalMapper"))
## [1] "There were 63 rows deleted from CalMapper"
print(paste("There were", nrow(pfirs)-nrow(pfirs_merge), "rows deleted from PFIRS"))
## [1] "There were 95 rows deleted from PFIRS"

Merge

cm_merge <- cm_merge %>% 
  rename(Agency = AGENCY_NAME,
          Acres_Burned = TREATED_ACRES,
         Burn_Type_Simple = ACTIVITY_DESCRIPTION) %>% 
  select(Data_source, TREATMENT_NAME, Acres_Burned, Burn_Type_Simple, ACTIVITY_START, ACTIVITY_END, Agency, everything()) %>% 
  st_drop_geometry()
pfirs_merge <- pfirs_merge %>% 
  rename(TREATMENT_NAME = Burn_Unit) %>% 
  st_drop_geometry() %>% 
  select(Data_source, TREATMENT_NAME, Acres_Burned, Burn_Type_Simple, Burn_Date, Agency, Burn_Type, everything())
merged_method2 <- full_join(cm_merge, pfirs_merge)
## Joining with `by = join_by(Data_source, TREATMENT_NAME, Acres_Burned,
## Burn_Type_Simple, Agency, Year)`
nrow(pfirs_merge)+nrow(cm_merge) == nrow(merged)
## [1] FALSE

Plot acreage

table_method2 <- merged_method2 %>% 
  group_by(Year, Data_source) %>% 
  summarize(acres = sum(Acres_Burned))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
plot_method2 <- table_method2 %>% 
  ggplot()+
  geom_col(aes(x = Year, y = acres, fill = Data_source))+
  theme_minimal()+
  ggtitle("Method 2")+
  ylim(c(0,110000))+
  ylab("")
plot_method2

Method 3: Method 2 but with a 30-day buffer

Filter dups to only rows where dates are THE SAME between the two data sets

Filter to same year

dups_daily <- dups %>% 
  filter(Year_pfirs == Year_cm)

Look for examples of where a temporal buffer is needed

dups_daily_new <- dups_daily %>% 
  filter(Agency_pfirs != "US Forest Service") %>% 
  select(ID_PFIRS_CT, Burn_Unit, Date_pfirs, ACTIVITY_START_cm, ACTIVITY_END_cm, Agency_pfirs, acres_cm) %>% 
  filter(Date_pfirs < ACTIVITY_START_cm | Date_pfirs > ACTIVITY_END_cm) 
dups_daily_new %>% 
  mutate(acres_cm_GIS = st_area(dups_daily_new)/ 4046.86) %>% 
  st_drop_geometry() %>% 
  tail(n=10)
## # A tibble: 10 × 8
##    ID_PFIRS_CT Burn_Unit   Date_pfirs          ACTIVITY_START_cm ACTIVITY_END_cm
##          <int> <chr>       <dttm>              <date>            <date>         
##  1        1131 Winton Sch… 2022-10-31 00:00:00 2022-02-01        2022-02-28     
##  2        1194 Block 9B    2022-11-18 00:00:00 2022-10-31        2022-10-31     
##  3        1133 Block 9C    2022-10-31 00:00:00 2022-11-01        2022-11-01     
##  4        1194 Block 9B    2022-11-18 00:00:00 2022-11-01        2022-11-01     
##  5        1133 Block 9C    2022-10-31 00:00:00 2022-12-09        2022-12-09     
##  6        1194 Block 9B    2022-11-18 00:00:00 2022-12-09        2022-12-09     
##  7        1119 Burn 3      2022-10-24 00:00:00 2022-10-25        2022-10-26     
##  8        1188 2022-Johns… 2022-11-15 00:00:00 2022-11-17        2022-11-18     
##  9        1105 UNIT 7      2022-10-19 00:00:00 2022-10-06        2022-10-08     
## 10        1105 UNIT 7      2022-10-19 00:00:00 2022-10-18        2022-10-18     
## # ℹ 3 more variables: Agency_pfirs <chr>, acres_cm <dbl>, acres_cm_GIS [m^2]

Plot bar charts side-by-side

Add numeric labels

Method 0

table_method0 <- table_method0 %>% 
  select(Year, source, ac) %>% 
  group_by(Year) %>% 
  mutate(total = sum(ac) %>% round(-3))
table_method0
## # A tibble: 6 × 4
## # Groups:   Year [3]
##   Year  source     ac  total
##   <fct> <chr>   <dbl>  <dbl>
## 1 2020  cm     16904. 102000
## 2 2021  cm     37178.  96000
## 3 2022  cm     29722.  88000
## 4 2020  pfirs  85037. 102000
## 5 2021  pfirs  58527.  96000
## 6 2022  pfirs  58111.  88000

Method 1

table_method1 <- table_method1 %>% 
  group_by(Year) %>% 
  mutate(total = sum(acres) %>% round(-3))
table_method1
## # A tibble: 6 × 4
## # Groups:   Year [3]
##   Year  Data_source  acres total
##   <fct> <chr>        <dbl> <dbl>
## 1 2020  CalMapper   16904. 89000
## 2 2020  PFIRS       72567. 89000
## 3 2021  CalMapper   37178. 87000
## 4 2021  PFIRS       50002. 87000
## 5 2022  CalMapper   29722. 74000
## 6 2022  PFIRS       44734. 74000

Method 2

table_method2 <- table_method2 %>% 
  group_by(Year) %>% 
  mutate(total = sum(acres) %>% round(-3))
table_method2
## # A tibble: 6 × 4
## # Groups:   Year [3]
##   Year  Data_source  acres total
##   <fct> <chr>        <dbl> <dbl>
## 1 2020  CalMapper   11739. 95000
## 2 2020  PFIRS       83341. 95000
## 3 2021  CalMapper   35880. 87000
## 4 2021  PFIRS       51310. 87000
## 5 2022  CalMapper   23695. 75000
## 6 2022  PFIRS       51605. 75000
plot_method0 <- plot_method0+
  geom_text(data = table_method0, aes(x = Year, y = total, label = comma(total)), vjust = -0.5)
plot_method1 <- plot_method1+
  geom_text(data = table_method1, aes(x = Year, y = total, label = comma(total)), vjust = -0.5)
plot_method2 <- plot_method2+
  geom_text(data = table_method2, aes(x = Year, y = total, label = comma(total)), vjust = -0.5)
all_plots <- (plot_method0 | plot_method1 | plot_method2)
all_plots

ggsave("figures/duplicates_bar_charts.png", width = 12, height = 5, units = c("in"))
# Sample data
data <- data.frame(
  Category = c("A", "B", "C", "D"),
  Value = c(20, 35, 15, 40),
  Group = c("Group1", "Group1", "Group2", "Group2")
)

# Create the stacked bar plot
p <- ggplot(data, aes(x = Category, y = Value, fill = Group)) +
  geom_col(position = "stack") +  # Create the stacked bar plot
  geom_text(data = data, aes(x = Category, y = Value, label = Value), vjust = -0.5)  # Add number labels above the bars

# Adjust the label position (vjust) as needed for your plot
# vjust = -0.5 moves the labels above the bars, you can adjust it to your preference
# hjust can be used to align the labels left, right, or center within the bars

# Display the plot
print(p)

Write duplicates to shapefile

# dups_daily_esri <- dups_daily %>% 
  # rename(Obj = TREATMENT_OBJECTIVE) %>% 
  # select(-TREATMENT_SHAPE, -Data_source, -Data_source.1)
# st_write(dups_daily_esri, dsn = "shapefiles", layer = "duplicates_final", driver = "ESRI Shapefile", delete_layer = T)
# 
# dups_daily_point <- pfirs %>% 
#   filter(ID_CT %in% dups_daily$ID_PFIRS_CT) 
# 
# st_write(dups_daily_point, dsn = "shapefiles", layer = "duplicates_POINT", driver = "ESRI Shapefile", delete_layer = T)